{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 常用的内建模块\n",
    "\"batteries included\"，Python内置了很多有用的模块"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1 datetime\n",
    "处理日期和时间的标准库"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "###  1. 获取当前日期和时间"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.当前日期和时间: 2018-08-14 13:36:03.679281 <class 'datetime.datetime'>\n"
     ]
    }
   ],
   "source": [
    "from datetime import datetime\n",
    "now = datetime.now()\n",
    "print('1.当前日期和时间:', now, type(now))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.获取指定日期和时间"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.指定的日期和时间: 2018-07-07 12:07:00\n"
     ]
    }
   ],
   "source": [
    "from datetime import datetime\n",
    "dt = datetime(2018, 7, 7, 12, 7)   # 用指定的时间创建一个datetime实例\n",
    "print('1.指定的日期和时间:', dt)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "###  3.datetime转换为timestamp\n",
    "自1970.01.01 00:00:00开始到当前的**秒数**称为timestamp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.指定日期的timestamp: 1530936420.0\n"
     ]
    }
   ],
   "source": [
    "dt = datetime(2018, 7, 7, 12, 7)  # 用指定的时间创建一个datetime实例\n",
    "print('1.指定日期的timestamp:', dt.timestamp())   # 把datetime转换为timestamp,+，小数点后的是毫秒"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 4.timestamp转换为datetime"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1994-08-25 09:22:57\n"
     ]
    }
   ],
   "source": [
    "t = 777777777\n",
    "print(datetime.fromtimestamp(t))  # 将积累的秒数转换为日期"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "一般的timestamp是不包含时区信息的，北京时间为：\n",
    "2018-08-11 19:51:13 UTC+8:00\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.本地时间: 2018-08-11 20:12:21.694892\n",
      "2.标准时间(UTC时间): 2018-08-11 12:12:21.694892\n"
     ]
    }
   ],
   "source": [
    "# 将本地时间转换到UTC标准时间\n",
    "now = 1533989541.694892                    # 当前的时间timestamp\n",
    "local_time = datetime.fromtimestamp(now)\n",
    "print('1.本地时间:', local_time)\n",
    "standard_time = datetime.utcfromtimestamp(now)   #北京是东八区，差8个小时\n",
    "print('2.标准时间(UTC时间):', standard_time)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 5.str与datetime互相转换"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.字符串转为日期: 2018-07-01 18:20:20 <class 'datetime.datetime'>\n"
     ]
    }
   ],
   "source": [
    "# 1. str转换到datetime，没有时区信息\n",
    "from datetime import datetime\n",
    "time_str = '2018-7-1 18:20:20'      # 日期的字符串\n",
    "time_format = '%Y-%m-%d %H:%M:%S'   # 时间显示格式\n",
    "cday = datetime.strptime(time_str, time_format)\n",
    "print('1.字符串转为日期:', cday, type(cday))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.日期转字符串: Tue, Aug 14 13:36 <class 'str'>\n"
     ]
    }
   ],
   "source": [
    "# 2. datetime转换到str\n",
    "now = datetime.now()\n",
    "time_format = '%a, %b %d %H:%M'\n",
    "time_str = now.strftime(time_format)\n",
    "print('1.日期转字符串:', time_str, type(time_str))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 6.datetime加减"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.当前日期和时间: 2018-08-14 13:36:03.789575\n",
      "2.快进2个小时: 2018-08-14 15:46:03.789575\n",
      "3.快退后的时间: 2018-08-13 12:34:43.789575\n"
     ]
    }
   ],
   "source": [
    "# 使用timedelta函数移动日期\n",
    "from datetime import datetime, timedelta\n",
    "now = datetime.now()\n",
    "print('1.当前日期和时间:', now)\n",
    "new_time = now + timedelta(hours=2, minutes=10)    # 直接使用+，可以快速推进时间\n",
    "print('2.快进2个小时:', new_time)\n",
    "new_time = now - timedelta(days=1, hours=1, minutes=1, seconds=20) \n",
    "print('3.快退后的时间:', new_time)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 7.时区转换"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "本地时间转换为UTC标准时间\n",
    "\n",
    "北京时间：UTC+8:00，UTC时间:UTC+0:00"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.新建时区: UTC+08:00\n",
      "2.当前日期和时间: 2018-08-14 13:36:03.809630\n",
      "3.添加时区信息后的时间: 2018-08-14 13:36:03 UTC+08:00\n"
     ]
    }
   ],
   "source": [
    "# 通过timezone为时间添加时区信息，初始化时区属性tzinfo，默认为None\n",
    "from datetime import datetime, timezone, timedelta\n",
    "tz_utc_8 = timezone(timedelta(hours=8))    # 创建时区UTC+8:00，如果是UTC-8:00直接将hours设为负数即可\n",
    "print('1.新建时区:', tz_utc_8)\n",
    "now = datetime.now()\n",
    "print('2.当前日期和时间:', now)\n",
    "new_time = now.replace(tzinfo=tz_utc_8)    # 添加时区信息\n",
    "print('3.添加时区信息后的时间:', new_time.strftime('%Y-%m-%d %H:%M:%S %Z'))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "时区转换"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.当前UTC的日期和时间: 2018-08-14 05:36:03.821663+00:00\n",
      "2.北京的日期和时间: 2018-08-14 13:36:03.821663+08:00\n",
      "3.东京的日期和时间: 2018-08-14 14:36:03.821663+09:00\n",
      "  从北京得到东京的日期和时间: 2018-08-14 14:36:03.821663+09:00\n"
     ]
    }
   ],
   "source": [
    "# 先用utcnow()获取当前UTC的时间，再做转换\n",
    "utc_dt = datetime.utcnow().replace(tzinfo=timezone.utc)\n",
    "print('1.当前UTC的日期和时间:', utc_dt)\n",
    "# 将时区转换为北京\n",
    "bj_dt = utc_dt.astimezone(timezone(timedelta(hours=8)))       # 直接从UTC时区转换\n",
    "print('2.北京的日期和时间:', bj_dt)\n",
    "# 将时区转还为东京\n",
    "tokyo_dt = utc_dt.astimezone(timezone(timedelta(hours=9)))    # 直接从UTC时区转换\n",
    "print('3.东京的日期和时间:', tokyo_dt)\n",
    "tokyo_dt2 = bj_dt.astimezone(timezone(timedelta(hours=9)))    # 从其他时区得到当前时区时间\n",
    "print('  从北京得到东京的日期和时间:', tokyo_dt2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "小结:\n",
    "- timestamp的值与时区无关\n",
    "- datetime需要添加时区信息才能得到一个特点的时间，否则当做本地时间"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "字符串转时间练习"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1433121030.0\n",
      "1433121030.0\n",
      "ok\n"
     ]
    }
   ],
   "source": [
    "# 将输入的日期和时区字符串信息转换为timestamp\n",
    "# 方法一 使用列表切片\n",
    "from datetime import datetime, timezone, timedelta\n",
    "def str2timestamp(dt_str, tz_str):\n",
    "    dt = datetime.strptime(dt_str, '%Y-%m-%d %H:%M:%S')    # 将str转换为datetime\n",
    "    hours_num = int(tz_str[4:tz_str.index(':')])           # 提取出时区中的数\n",
    "    flag = 1 if tz_str[3] == '+' else -1                   # 分辨时区的加或减\n",
    "    dt_zone = timezone(timedelta(hours=flag*hours_num))    # 设置时区\n",
    "    new_dt = dt.replace(tzinfo=dt_zone)                    # 添加时区的新时间\n",
    "    print(new_dt.timestamp())\n",
    "    return new_dt.timestamp()\n",
    "\n",
    "t1 = str2timestamp('2015-6-1 08:10:30', 'UTC+7:00')\n",
    "assert t1 == 1433121030.0, t1\n",
    "t2 = str2timestamp('2015-5-31 16:10:30', 'UTC-09:00')\n",
    "assert t2 == 1433121030.0, t2\n",
    "print('ok')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ok\n"
     ]
    }
   ],
   "source": [
    "# 方法二 使用正则表达式\n",
    "from datetime import datetime, timezone, timedelta\n",
    "import re                # 正则表达式模块\n",
    "def str2timestamp(dt_str, tz_str):\n",
    "    dt = datetime.strptime(dt_str, '%Y-%m-%d %H:%M:%S') # dt_st要与格式相匹配\n",
    "#     print(dt)\n",
    "    zone_hours = int(re.match(r'^UTC([-+\\d]+):\\d+$', tz_str).group(1))   # 使用正则表达式提取hours\n",
    "    dt_zone = timezone(timedelta(hours=zone_hours))\n",
    "    new_dt = dt.replace(tzinfo=dt_zone)\n",
    "#     print(new_dt)\n",
    "    return new_dt.timestamp()\n",
    "t1 = str2timestamp('2015-6-1 08:10:30', 'UTC+7:00')\n",
    "assert t1 == 1433121030.0, t1\n",
    "t2 = str2timestamp('2015-5-31 16:10:30', 'UTC-09:00')\n",
    "assert t2 == 1433121030.0, t2\n",
    "print('ok')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2 colllections"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Python的集合模块，提供许多有用的集合类"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.namedtuple\n",
    "namedtuple('名称', [属性list])\n",
    "\n",
    "为元组提供名称\n",
    "例子：p=(1,2)，无法知道1,2的意义，如果定义类来表示元素的意义会增加复杂性，所以使用namedtuple"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.点的坐标为: 1 2\n",
      "2.namedtuple的类型: <class '__main__.Point_name'>\n",
      "3.namedtuple既是Point也是tuple: True True\n"
     ]
    }
   ],
   "source": [
    "# 为元组提供意义\n",
    "from collections import namedtuple\n",
    "Point = namedtuple('Point_name', ['x', 'y'])  # Point_name只是表示该元组的类型名称，可以随意取\n",
    "p = Point(1, 2)\n",
    "print('1.点的坐标为:', p.x, p.y)\n",
    "print('2.namedtuple的类型:', type(p))\n",
    "print('3.namedtuple既是Point也是tuple:', isinstance(p, Point), isinstance(p, tuple))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.定义的圆参数: Circle(x=2, y=3, r=4) <class '__main__.Circle'>\n"
     ]
    }
   ],
   "source": [
    "# 定义一个圆\n",
    "Circle = namedtuple('Circle', ['x', 'y', 'r'])   # 定义有意义的元组\n",
    "mycircle = Circle(2, 3, r=4)\n",
    "print('1.定义的圆参数:', mycircle, type(mycircle))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.deque\n",
    "双向列表实现高效的插入和删除操作，使用与队列和栈"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.list操作消耗的时间:\n",
      "-1 1001\n",
      "function list_operation consumes: 0.045219414445050204 ms\n",
      "2.deque操作消耗的时间:\n",
      "-1 99\n",
      "function deque_operation consumes: 0.06564108548475031 ms\n"
     ]
    }
   ],
   "source": [
    "# 用一个装饰器测试列表操作的时间\n",
    "import time, functools\n",
    "from collections import deque\n",
    "def runtime(func): \n",
    "    @functools.wraps(func)       # 保证装饰后的函数名称前后一致,@符号别忘了加\n",
    "    def wraper(*args, **kw):\n",
    "        start_time = time.clock()     # 获取程序运行前的时间\n",
    "        func(*args, **kw)             # 调用待装饰的函数\n",
    "        execute_time = time.clock() - start_time   # 计算消耗的时间\n",
    "        print('function {} consumes: {} ms'.format(func.__name__, execute_time *1000))\n",
    "        return execute_time * 1000    # 返回消耗的时间\n",
    "    return wraper                     #  返回装饰后的函数   \n",
    "\n",
    "@runtime\n",
    "def list_operation(mylist=[]):\n",
    "    mylist = list(range(1000))\n",
    "    mylist.append(1001)     # 列表尾部添加元素\n",
    "    mylist.insert(0, -1)    # 列表头部添加元素\n",
    "    mylist.pop(233)\n",
    "    print(mylist[0], mylist[1000])\n",
    "print('1.list操作消耗的时间:')\n",
    "out = list_operation()\n",
    "\n",
    "@runtime\n",
    "def deque_operation(mydeque=[]):\n",
    "    mydeque = deque(range(1000))\n",
    "    mydeque.append(1001)      # 列表尾部添加元素\n",
    "    mydeque.appendleft(-1)    # 列表头部添加元素\n",
    "#     print(mydeque.pop()  )           # 从尾部删除一个元素\n",
    "    print(mydeque[0], mydeque[100])\n",
    "print('2.deque操作消耗的时间:')\n",
    "out = deque_operation()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.原来的序列: deque([1, 2, 3, 4, 5, 6, 7])\n",
      "2.append()尾部添加新元素: deque([1, 2, 3, 4, 5, 6, 7, 8])\n",
      "3.appendleft()头部添加新元素: deque([0, 1, 2, 3, 4, 5, 6, 7, 8])\n",
      "4.pop()尾部删除元素: deque([0, 1, 2, 3, 4, 5, 6, 7])\n",
      "5.popleft()头部删除元素: deque([1, 2, 3, 4, 5, 6, 7])\n"
     ]
    }
   ],
   "source": [
    "# deque的功能\n",
    "mydeque = deque([1,2,3,4,5,6,7])\n",
    "print('1.原来的序列:', mydeque)\n",
    "mydeque.append(8)       # 尾部添加新元素\n",
    "print('2.append()尾部添加新元素:', mydeque)\n",
    "mydeque.appendleft(0)\n",
    "print('3.appendleft()头部添加新元素:', mydeque)\n",
    "mydeque.pop()                                   # pop()中不可以加索引参数\n",
    "print('4.pop()尾部删除元素:', mydeque)\n",
    "mydeque.popleft()\n",
    "print('5.popleft()头部删除元素:', mydeque)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3.defaultdict\n",
    "在key不存在是可以返回设定的默认值，其他行为与dict一致"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.访问存在的key1: abc\n",
      "2.访问不存在的key2，返回默认值: N/A\n"
     ]
    }
   ],
   "source": [
    "from collections import defaultdict\n",
    "dd = defaultdict(lambda: 'N/A')    # 当访问不存在的key时，返回N/A\n",
    "dd['key1'] = 'abc'\n",
    "print('1.访问存在的key1:', dd['key1'])\n",
    "print('2.访问不存在的key2，返回默认值:', dd['key2'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 4.OrderedDict\n",
    "key有一定的顺序，按插入的顺序进行排练"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.OrderedDict是有序的: OrderedDict([('a', 1), ('b', 2), ('c', 3)])\n",
      "2.为OrderedDict添加元素: OrderedDict([('a', 1), ('b', 2), ('c', 3), ('x', 4), ('y', 5), ('z', 6)])\n",
      "3.为OrderedDict连接新字典: OrderedDict([('a', 1), ('b', 2), ('c', 3), ('x', 4), ('y', 5), ('z', 6), ('o', 22), ('p', 33)])\n",
      "4.删除key为b的值: OrderedDict([('a', 1), ('c', 3), ('x', 4), ('y', 5), ('z', 6), ('o', 22), ('p', 33)])\n"
     ]
    }
   ],
   "source": [
    "from collections import OrderedDict\n",
    "dic = OrderedDict([('a', 1), ('b', 2), ('c', 3)])\n",
    "print('1.OrderedDict是有序的:', dic)\n",
    "dic['x'] = 4\n",
    "dic['y'] = 5\n",
    "dic['z'] = 6\n",
    "print('2.为OrderedDict添加元素:', dic)\n",
    "dic.update({'o':22, 'p':33})\n",
    "print('3.为OrderedDict连接新字典:', dic)\n",
    "dic.pop('b')\n",
    "print('4.删除key为b的值:', dic)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "add: ('a', 1)\n",
      "add: ('b', 2)\n",
      "add: ('c', 3)\n",
      "LastUpdateOrderedDict([('a', 1), ('b', 2), ('c', 3)])\n",
      "set: ('a', 4)\n",
      "LastUpdateOrderedDict([('b', 2), ('c', 3), ('a', 4)])\n",
      "3 0 3\n",
      "move: ('b', 2)\n",
      "add: ('d', 5)\n",
      "LastUpdateOrderedDict([('c', 3), ('a', 4), ('d', 5)])\n"
     ]
    }
   ],
   "source": [
    "# OrderedDict实现一个FIFO的dict，当容量超出时，先删除最早添加的key\n",
    "from collections import OrderedDict\n",
    "class LastUpdateOrderedDict(OrderedDict):\n",
    "    def __init__(self, capacity):\n",
    "        super(LastUpdateOrderedDict,self).__init__()   # super().__init__() python3这样写就行\n",
    "        self._capacity = capacity     \n",
    "    def __setitem__(self, key, value):\n",
    "        containsKey = 1 if key in self else 0      # 已经包含了该key\n",
    "        if len(self) - containsKey >= self._capacity:  # 超出容量限制\n",
    "            print(len(self), containsKey, self._capacity)\n",
    "            last = self.popitem(last=False)            # 将头部的数据删除\n",
    "            print('move:', last)\n",
    "        if containsKey:\n",
    "            del self[key]\n",
    "            print('set:', (key, value))\n",
    "        else:\n",
    "            print('add:', (key, value))\n",
    "        OrderedDict.__setitem__(self, key, value)      # 真正设置key-value的地方 \n",
    "dic = LastUpdateOrderedDict(3)\n",
    "dic['a'] = 1\n",
    "dic['b'] = 2\n",
    "dic['c'] = 3\n",
    "print(dic)\n",
    "dic['a'] = 4\n",
    "print(dic)\n",
    "dic['d'] = 5\n",
    "print(dic)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 5.Counter\n",
    "简单的计数器，统计字符出现的个数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.统计字符出现的次数: Counter({'r': 2, 'g': 2, 'm': 2, 'p': 1, 'o': 1, 'a': 1, 'i': 1, 'n': 1})\n"
     ]
    }
   ],
   "source": [
    "from collections import Counter\n",
    "c = Counter()                   # 以dict的形式统计字符 key-value key：字符，value：出现的次数\n",
    "for ch in 'programming':\n",
    "    c[ch] += 1\n",
    "print('1.统计字符出现的次数:', c)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3 base64\n",
    "Base64是任意二进制到文本字符串的编码方法，常用到URL、Cookie和网页中传输少量的二进制数据\n",
    "\n",
    "二进制文件中的部分内容是无法显示的，所以转到字符串才能显示"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "![编码方式](base64编码.jpg)\n",
    "\n",
    "base64对二进制数据处理：每3个字节一组，处理`3*8=24`bit，分为`4*6`的形式，每组6bit，得到4个数字作为索引，然后查表获取编码后的字符串\n",
    "\n",
    "注意：用`\\x00`字节在末尾补足，然后在末尾加1到2个`=`，表示补足的字节数，解码的时候去掉"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.编码的结果: b'YmluYXJ5c3RyaW5nXw=='\n",
      "2.解码的结果: b'binarystring_'\n"
     ]
    }
   ],
   "source": [
    "# 1. base64模块练习\n",
    "import base64\n",
    "encode_out = base64.b64encode(b'binarystring_')\n",
    "print('1.编码的结果:', encode_out)\n",
    "decode_out = base64.b64decode(encode_out)\n",
    "print('2.解码的结果:', decode_out)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.普通的base64编码: b'abcd++//'\n",
      "2.url safe的base64编码: b'abcd--__'\n",
      "3.url safe的base64解码: b'i\\xb7\\x1d\\xfb\\xef\\xff'\n"
     ]
    }
   ],
   "source": [
    "# 2. url safe base64编码，+ / 变为- _来处理\n",
    "out = base64.b64encode(b'i\\xb7\\x1d\\xfb\\xef\\xff')\n",
    "print('1.普通的base64编码:', out)\n",
    "urlsafe_out = base64.urlsafe_b64encode(b'i\\xb7\\x1d\\xfb\\xef\\xff')\n",
    "print('2.url safe的base64编码:', urlsafe_out)\n",
    "origin_out = base64.urlsafe_b64decode(urlsafe_out)\n",
    "print('3.url safe的base64解码:', origin_out)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "小结:\n",
    "- Base64编码的长度是4的倍数\n",
    "- Base64编码是使用64个字符表示二进制文件的方法，最终得到表示二进制文件的字符串"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "base64练习"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "ok\n"
     ]
    }
   ],
   "source": [
    "# 去掉=的base64解码函数\n",
    "import base64\n",
    "def safe_base64_decode(s):   # 将带==或不带==的编码结果，解码得到原始的内容\n",
    "    temp = len(s) % 4        # 查看字符串长度是否是4的倍数\n",
    "    s += temp * b'='         # bytes类型的数据\n",
    "    return base64.b64decode(s)\n",
    "assert b'abcd' == safe_base64_decode(b'YWJjZA=='), safe_base64_decode('YWJjZA==')\n",
    "assert b'abcd' == safe_base64_decode(b'YWJjZA'), safe_base64_decode('YWJjZA')\n",
    "print('ok')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "bytes"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(b'a' + b'c')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4 struct\n",
    "实现bytes和其他二进制数据的转换\n",
    "\n",
    "pack()函数将任意类型数据转为bytes，unpack()函数将bytes数据还原为原数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0, 156, 64, 99]\n",
      "b'\\x00\\x9c@c'\n"
     ]
    }
   ],
   "source": [
    "# int转bytes的最原始方法\n",
    "n = 10240099\n",
    "b1 = (n & 0xff000000) >> 24\n",
    "b2 = (n & 0xff0000) >> 16\n",
    "b3 = (n & 0xff00) >> 8\n",
    "b4 = n & 0xff\n",
    "print([b1, b2, b3, b4])\n",
    "bs = bytes([b1, b2, b3, b4])\n",
    "print(bs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.转换为bytes数据的结果: b'\\x00\\x9c@c'\n",
      "2.转换为原数据的结果: (10240099,)\n",
      "3.转换为原数据的结果: (4042322160, 32896)\n"
     ]
    }
   ],
   "source": [
    "# 使用struct模块，在bytes数据和其他类型数据间转换\n",
    "import struct\n",
    "pack_out = struct.pack('>I', 10240099)         # 将整数转换为bytes\n",
    "print('1.转换为bytes数据的结果:', pack_out)\n",
    "unpack_out = struct.unpack('>I', pack_out)     # 将bytes数据还原\n",
    "print('2.转换为原数据的结果:', unpack_out)\n",
    "unpack_out = struct.unpack('>IH', b'\\xf0\\xf0\\xf0\\xf0\\x80\\x80')  # 参数个数要和处理指令一致\n",
    "print('3.转换为原数据的结果:', unpack_out)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "小结:\n",
    "- `>`表示字节顺序是big-endian，`I`是4字节无符号整数，`H`是2字节无符号整数\n",
    "- 参数个数要和处理指令一致"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "struct练习"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.前30个二进制数据: b'BM\\xf6\\x89\\x01\\x00\\x00\\x00\\x00\\x006\\x00\\x00\\x00(\\x00\\x00\\x00\\xc8\\x00\\x00\\x00\\xa8\\x00\\x00\\x00\\x01\\x00\\x18\\x00'\n",
      "2.bytes数据转换的结果: (b'B', b'M', 100854, 0, 54, 40, 200, 168, 1, 24)\n"
     ]
    }
   ],
   "source": [
    "# 1. 读取一张bmp图像，小端存储\n",
    "# 两个字节：BM表示Windows位图，BA表示OS/2位图\n",
    "with open('test_struct.bmp', 'rb') as img:\n",
    "    start_bytes = img.read()[:30]       # 取前30个字节\n",
    "    out = struct.unpack('<ccIIIIIIHH', start_bytes)\n",
    "    print('1.前30个二进制数据:', start_bytes)\n",
    "    print('2.bytes数据转换的结果:', out)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "bmp图像前30个字节包含了图像的信息：\n",
    "\n",
    "|字节数(共30)|信息|\n",
    "|-|-|\n",
    "|前2个字节|BM表示Windows位图，BA表示os/2位图|\n",
    "|4个字节|表示位图大小|\n",
    "|4个字节|保留位，始终为0|\n",
    "|4个字节|实际图像的偏移量|\n",
    "|4个字节|Header的字节数|\n",
    "|4个字节|图像宽度|\n",
    "|4个字节|图像高度|\n",
    "|2个字节|始终为1|\n",
    "|2个字节|颜色数|"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "BM\n",
      "ok\n"
     ]
    }
   ],
   "source": [
    "# 2.检查任意文件是否是位图，如果是打印大小和颜色数\n",
    "import base64, struct\n",
    "bmp_data = base64.b64decode('Qk1oAgAAAAAAADYAAAAoAAAAHAAAAAoAAAABABAAAAAAADICAAASCwAAEgsAAAAAAAAAAAAA/3//f/9//3//f/9//3//f/9//3//f/9//3//f/9//3//f/9//3//f/9//3//f/9//3//f/9//3//f/9/AHwAfAB8AHwAfAB8AHwAfP9//3//fwB8AHwAfAB8/3//f/9/AHwAfAB8AHz/f/9//3//f/9//38AfAB8AHwAfAB8AHwAfAB8AHz/f/9//38AfAB8/3//f/9//3//fwB8AHz/f/9//3//f/9//3//f/9/AHwAfP9//3//f/9/AHwAfP9//3//fwB8AHz/f/9//3//f/9/AHwAfP9//3//f/9//3//f/9//38AfAB8AHwAfAB8AHwAfP9//3//f/9/AHwAfP9//3//f/9//38AfAB8/3//f/9//3//f/9//3//fwB8AHwAfAB8AHwAfAB8/3//f/9//38AfAB8/3//f/9//3//fwB8AHz/f/9//3//f/9//3//f/9/AHwAfP9//3//f/9/AHwAfP9//3//fwB8AHz/f/9/AHz/f/9/AHwAfP9//38AfP9//3//f/9/AHwAfAB8AHwAfAB8AHwAfAB8/3//f/9/AHwAfP9//38AfAB8AHwAfAB8AHwAfAB8/3//f/9//38AfAB8AHwAfAB8AHwAfAB8/3//f/9/AHwAfAB8AHz/fwB8AHwAfAB8AHwAfAB8AHz/f/9//3//f/9//3//f/9//3//f/9//3//f/9//3//f/9//3//f/9//3//f/9//3//f/9//3//f/9//38AAA==')\n",
    "# 解码后得到原来的数据\n",
    "# print(bmp_data)\n",
    "\n",
    "def bmp_info(data):\n",
    "    data_type = data[0:2]\n",
    "    print((data_type).decode('utf-8'))   # bytes数据b'BM'可以解码为正常的字符串'BM'\n",
    "    if data_type in (b'BM', b'BA'):      # 证明是位图\n",
    "        start_bytes = bmp_data[0:30]\n",
    "        out = struct.unpack('<ccIIIIIIHH', start_bytes)\n",
    "#         print(out)\n",
    "        return {\n",
    "            'width' : out[6],\n",
    "            'height': out[7],\n",
    "            'color' : out[-1]\n",
    "        }\n",
    "# 测试\n",
    "bi = bmp_info(bmp_data)\n",
    "assert bi['width'] == 28         # 函数返回字典的用法\n",
    "assert bi['height'] == 10\n",
    "assert bi['color'] == 16\n",
    "print('ok')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "小结:\n",
    "- return可以返回一个数、列表、元组或字典"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
